import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import os
path = 'C:/Users/Mengjie Ye/Desktop'
os.chdir(path)
import torch.nn as nn
torch.manual_seed(12)
plt.rcParams['savefig.dpi'] = 300 #图片像素
plt.rcParams['figure.dpi'] = 200 #分辨率
stock = pd.read_csv('./hsmd/a.us.txt')
print(stock.head())
stock = new_data[4].copy()
stock = stock.iloc[2000:,:]
stock.head()
re = np.log(stock.Close).diff(1)
stock['Return'] = re
stock.head()
stock = stock[['Open','High','Low','Close','Return','Volume']]
stock.dropna(inplace=True)#注意dropna和label的顺序 不然维数不一样
Label = stock.Close
# print(re)
print(stock.head(10))
# print(Label.shape)
print(stock.shape)
print(stock.info())
print(stock.isnull().sum())
print(stock.describe())
stock[['Open','High','Low','Close']].boxplot()
stock[['Volume']].boxplot()
stock[['Close']].hist(bins = 40)%save
stock.corr(method = 'pearson')
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
# stock_sv = series_to_supervised(stock,15,2)
stock_sv = series_to_supervised(stock,15,1)
# print(stock_sv.head())
# print(stock_sv.columns)
# Close = stock_sv['var4(t)'].values
# print(Close)
# Stock = stock_sv.drop(stock_sv.columns[[-1,-2,-4,-5,-6,-7,-8,-10,-11,-12]],axis=1)
# Stock = stock_sv.drop(stock_sv.columns[[-1,-2,-4,-5,-6]],axis=1)
#前15天的6个特征+当日的开盘价
Stock = stock_sv.drop(stock_sv.columns[[-1,-2,-4,-5]],axis=1)
print(Stock.head())
# #对y也进行归一化,scaler_Close为了之后的去归一化
# Close_16 = Stock.values[:,-1]
# scaler = preprocessing.MinMaxScaler()
# scaler.fit(Stock)
# stock_scale = scaler.transform(Stock)
# print(stock_scale.shape)
# scaler_Close = preprocessing.MinMaxScaler()
# scaler_Close.fit(Close_16.reshape(-1,1))
# # stock_scale
# scaler_close
# 对于所有特征进行normalization,y不进行
X = Stock.values[:,:-1]
y = Stock.values[:,-1].reshape(-1,1)
# 对于所有特征进行normalization,y不进行
scaler = preprocessing.MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
print(X)
scaler_y = preprocessing.MinMaxScaler()
scaler_y.fit(y)
y = scaler_y.transform(y)
print(y)
# # X = stock_scale[:,:-2]
# # y = stock_scale[:,-2:]
# X = stock_scale[:,:-1]
# y = stock_scale[:,-1:]
# print(X.shape,y.shape)
# 分割训练集和测试集
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.7,test_size=0.3,shuffle = False)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0],1,X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0],1,X_test.shape[1]))
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
# y_train,y_test = y_train.reshape((-1,2)), y_test.reshape((-1,2))
y_train,y_test = y_train.reshape((-1,1)), y_test.reshape((-1,1))
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
X_train_t = torch.from_numpy(X_train).type(torch.Tensor)
y_train_t= torch.from_numpy(y_train).type(torch.Tensor)
X_test_t = torch.from_numpy(X_test).type(torch.Tensor)
y_test_t = torch.from_numpy(y_test).type(torch.Tensor)
print(X_train_t)
print(X_train_t.shape)
EPOCHS = 500
BATCH_SIZE = 20
TIME_STEP = 1 #上面已经手动把15天的feature搞在一起了
INPUT_SIZE = 91 # X_train.shape[2]
# INPUT_SIZE = 61
HIDDEN_SIZE = 30
LR = 0.01 # learning rate
# OUTPUT_SIZE = 2
OUTPUT_SIZE = 1
class LSTM(nn.Module):
def __init__(self):
super(LSTM,self).__init__()
self.LSTM = nn.LSTM(
input_size = INPUT_SIZE,
hidden_size = HIDDEN_SIZE,
num_layers = 2,
batch_first = True
)
# self.out = nn.Linear(HIDDEN_SIZE,2)
self.out = nn.Linear(HIDDEN_SIZE,OUTPUT_SIZE)
def forward(self,x):
# x.shape (batch,time_step,input_size)
#r_out.shape (batch,time_step,output_size)
#h_n.shape (n_layers,batch,hidden_size)
#h_c.shape (n_layers,batch,hidden_size)
r_out, (h_n,h_c) = self.LSTM(x,None)
#None : zero initial hidden state
res = self.out(r_out[:,-1,:])
return res
Lstm = LSTM()
print(Lstm)
optimizer = torch.optim.Adam(Lstm.parameters(),lr=LR)
loss_func = nn.MSELoss()
hist = np.zeros(EPOCHS)
# hist_inv = np.zeros(EPOCHS) #对y进行归一化后,记录去归一化之后的结果,没必要所以全部注释掉
hist_test = np.zeros(EPOCHS)
# hist_test_inv = np.zeros(EPOCHS)
for t in range(EPOCHS):
y_pred = Lstm(X_train_t)
y_pred_inv = torch.tensor(scaler_y.inverse_transform(y_pred.detach().numpy().reshape(-1,1)),requires_grad=True).double()
y_train_inv = torch.tensor(scaler_y.inverse_transform(y_train.reshape(-1,1)),requires_grad=True).double()
loss_inv = loss_func(y_pred_inv,y_train_inv)
loss = loss_func(y_pred,y_train_t)
if t % 20 == 0:
# print('Epoch:',t,'loss',loss.item())
# print('pred',y_pred.detach().numpy()[-3:])
# print('train',y_train[-3:])
print('Epoch:',t,'loss',loss_inv.item())
print('pred',y_pred_inv[:5])
print('train',y_train_inv[:5])
# hist[t] = loss.item()
hist[t] = loss_inv.item()
optimizer.zero_grad()
loss.backward()
# loss_inv.backward()
optimizer.step()
test_pre = Lstm(X_test_t)
y_test_pred_inv = torch.tensor(scaler_y.inverse_transform(test_pre.detach().numpy().reshape(-1,1)),requires_grad=True).double()
y_test_inv = torch.tensor(scaler_y.inverse_transform(y_test.reshape(-1,1)),requires_grad=True).double()
loss_test = loss_func(test_pre,y_test_t)
loss_test_inv = loss_func(y_test_pred_inv,y_test_inv)
# hist_test[t] = loss_test.item()
hist_test[t]=loss_test_inv.item()
# debug时的草稿
# _=torch.tensor(scaler_close.inverse_transform(y_pred.detach().numpy().reshape(-1,1)),requires_grad=True).double()
# __=torch.tensor(scaler_close.inverse_transform(y_train.reshape(-1,1)),requires_grad=True).double()
# print(_)
# print(__)
# loss_func(_,__)
# hist
#输出后两天
# y_pred_1 = y_pred_np[:,0]
# # y_pred_1.shape
# y_pred_2 = y_pred_np[:,1]
# # y_pred_2.shape
#对y进行归一化所需要的
# scaler_Close = preprocessing.MinMaxScaler()
# Close_scaled=scaler_Close.fit_transform(Label.values.astype('float32').reshape(-1,1))
# y_train.reshape(-1,1)
# y_train.shape
# y_pred_np.shape
# y_test.shape[0]+y_train.shape[0]
# y_train.shape
# #对y归一化所需要的
# predict_close=scaler_Close.inverse_transform(y_pred_np.astype('float32').reshape(-1,1))
# real_close=scaler_Close.inverse_transform(y_train.astype('float32').reshape(-1,1))
# # predict_close_1=scaler_Close.inverse_transform(y_pred_1.astype('float32').reshape(-1,1))
# # predict_close_2=scaler_Close.inverse_transform(y_pred_2.astype('float32').reshape(-1,1))
# # real_close_1=scaler_Close.inverse_transform(y_train[:,0].astype('float32').reshape(-1,1))
# # real_close_2=scaler_Close.inverse_transform(y_train[:,1].astype('float32').reshape(-1,1))
# predict_close.shape
# # real_close_1.shape
y_pred_np = y_pred.detach().numpy()
# y_pred_np
y_pred_np = scaler_y.inverse_transform(y_pred.detach().numpy().reshape(-1,1))
y_train = scaler_y.inverse_transform(y_train.reshape(-1,1))
layout_train = plotly.graph_objs.Layout(
title='train',
autosize=False,
width=800,
height=600,
xaxis=dict(
title = "Date"
),
yaxis=dict(
title = "Close"
)
)
# trace0 = y_train.tolist
# trace1 = y_pred_np.tolist
# trace = [trace0,trace1]
y_pred_np.shape
trace0 = plotly.graph_objs.Scatter(
# x= ,
y=y_train.reshape(y_train.shape[0]),
mode='lines',
# line=dict(
# color = clr
# )
name='real'
)
trace1 = plotly.graph_objs.Scatter(
# x = ,
y = y_pred_np.reshape(y_pred_np.shape[0]),
mode = 'lines',
name = 'pred'
)
traces_train = [trace0,trace1]
fig_train = plotly.graph_objs.Figure(data=traces_train, layout=layout_train)
plotly.offline.iplot(fig_train)
# train_df=pd.DataFrame({'predict':y_pred_np[900:].reshape(y_pred_np[900:].shape[0]),
# 'real':y_train[900:].reshape(y_train[900:].shape[0])})
train_df=pd.DataFrame({'predict':y_pred_np.reshape(y_pred_np.shape[0]),
'real':y_train.reshape(y_train.shape[0])})
train_df['predict'].plot(label='predict',linewidth = 0.3)
train_df['real'].plot(label='real',linewidth = 0.3)
plt.legend()
plt.title('Closing Price Train ')
# plt.savefig('./hsmd/closing_train.png')
# train_df=pd.DataFrame({'predict_1':predict_close_1.reshape(predict_close_1.shape[0]),'predict_2':predict_close_2.reshape(predict_close_2.shape[0]),
# 'real_1':real_close_1.reshape(real_close_1.shape[0]),'real_2':real_close_2.reshape(real_close_2.shape[0])})
# train_df['predict_1'].plot(label='predict_1',linewidth = 0.3)
# train_df['real_1'].plot(label='real_1',linewidth = 0.3)
# plt.legend()
# plt.title('Closing Price Train 1')
# plt.savefig('./hsmd/closing_train_1.png')
# train_df['predict_2'].plot(label='predict_2',linewidth = 0.3)
# train_df['real_2'].plot(label='real_2',linewidth = 0.3)
# plt.legend()
# plt.title('Closing Price Train 2')
# plt.savefig('./hsmd/closing_train_2.png')
test_pred = Lstm(X_test_t).detach().numpy()
# predict_close_test=scaler_Close.inverse_transform(prediction.astype('float32'))
# real_close_test=scaler_Close.inverse_transform(y_test.astype('float32'))
# test_pred.shape
test_pred = scaler_y.inverse_transform(Lstm(X_test_t).detach().numpy().reshape(-1,1))
y_test = scaler_y.inverse_transform(y_test.reshape(-1,1))
layout_test = plotly.graph_objs.Layout(
title='test',
autosize=False,
width=800,
height=600,
xaxis=dict(
title = "Date"
),
yaxis=dict(
title = "Close"
)
)
trace0_ = plotly.graph_objs.Scatter(
# x= ,
y=y_test.reshape(y_test.shape[0]),
mode='lines',
# line=dict(
# color = clr
# )
name='real'
)
trace1_ = plotly.graph_objs.Scatter(
# x = ,
y = test_pred.reshape(test_pred.shape[0]),
mode = 'lines',
name = 'pred'
)
traces_test = [trace0_,trace1_]
fig_test = plotly.graph_objs.Figure(data=traces_test, layout=layout_test)
plotly.offline.iplot(fig_test)
max((test_pred-y_test))
min((test_pred-y_test))
test_df=pd.DataFrame({'predict':test_pred.reshape(test_pred.shape[0]),
'real':y_test.reshape(y_test.shape[0])})
test_df['predict'].plot(label='predict',linewidth = 0.3)
test_df['real'].plot(label='real',linewidth = 0.3)
plt.legend()
plt.title('Closing Price Test')
# plt.savefig('./hsmd/temp')
# test_df=pd.DataFrame({'predict_1':predict_close_test[:,0].reshape(predict_close_test[:,0].shape[0]),
# 'predict_2':predict_close_test[:,1].reshape(predict_close_test[:,1].shape[0]),
# 'real_1':real_close_test[:,0].reshape(real_close_test[:,0].shape[0]),
# 'real_2':real_close_test[:,1].reshape(real_close_test[:,1].shape[0])})
# test_df['predict_1'].plot(label='predict_1',linewidth = 0.3)
# test_df['real_1'].plot(label='real_1',linewidth = 0.3)
# plt.legend()
# plt.title('Closing Price Test 1')
# plt.savefig('./hsmd/closing_test_1.png')
# test_df['predict_2'].plot(label='predict_2',linewidth = 0.3)
# test_df['real_2'].plot(label='real_2',linewidth = 0.3)
# plt.legend()
# plt.title('Closing Price Test 2')
# plt.savefig('./hsmd/closing_test_2.png')
data_real = np.append(y_train,y_test)
data_pred = np.append(y_pred_np,test_pred)
layout_data = plotly.graph_objs.Layout(
title='data',
autosize=False,
width=800,
height=600,
xaxis=dict(
title = "Date"
),
yaxis=dict(
title = "Close"
)
)
trace0_d = plotly.graph_objs.Scatter(
# x= ,
y=data_real.reshape(data_real.shape[0]),
mode='lines',
# line=dict(
# color = clr
# )
name='real'
)
trace1_d = plotly.graph_objs.Scatter(
# x = ,
y = data_pred.reshape(data_pred.shape[0]),
mode = 'lines',
name = 'pred'
)
traces_d = [trace0_d,trace1_d]
fig_d = plotly.graph_objs.Figure(data=traces_d, layout=layout_data)
plotly.offline.iplot(fig_d)
# data_df.max().max()
data_df = pd.concat([train_df,test_df],axis = 0,ignore_index=True)
# print(data_df.head(),data_df.tail())
#
data_df['predict'].plot(label='predict',linewidth = 0.3)
data_df['real'].plot(label='real',linewidth = 0.3)
plt.legend()
plt.title('Closing Price DATA ')
plt.vlines(train_df.shape[0],data_df.min().min(),data_df.max().max(),colors='r',linestyles='dashed',label='train test',linewidth=0.2)
# plt.savefig('./hsmd/closing_data.png')
# data_df['predict_1'].plot(label='predict_1',linewidth = 0.3)
# data_df['real_1'].plot(label='real_1',linewidth = 0.3)
# plt.legend()
# plt.title('Closing Price DATA 1 ')
# plt.vlines(train_df.shape[0],0,120,colors='r',linestyles='dashed',label='train test',linewidth=0.2)
# plt.savefig('./hsmd/closing_data_1.png')
# data_df['predict_2'].plot(label='predict_2',linewidth = 0.3)
# data_df['real_2'].plot(label='real_2',linewidth = 0.3)
# plt.legend()
# plt.title('Closing Price DATA 2')
# plt.vlines(train_df.shape[0],0,120,colors='r',linestyles='dashed',label='train test',linewidth=0.2)
# plt.savefig('./hsmd/closing_data_2.png')
# print(y_pred.detach().numpy())
# y_pred.detach().numpy()[1,1]
# y_train[1,1]
# plt.figure()
# plt.plot(y_pred.detach().numpy()[:,0], label="Preds_1",linewidth = 2.0)
# plt.plot(y_train[:,0], label="Data_1")
# plt.legend()
# plt.show()
# plt.figure()
# plt.plot(y_pred.detach().numpy()[:,1], label="Preds_2",linewidth = 2.0)
# plt.plot(y_train[:,1], label="Data_2")
# plt.legend()
# plt.show()
layout_loss = plotly.graph_objs.Layout(
title='loss',
autosize=False,
width=800,
height=600,
xaxis=dict(
title = "epoch"
),
yaxis=dict(
title = "loss"
)
)
loss0 = plotly.graph_objs.Scatter(
# x= ,
y=hist,
mode='lines',
# line=dict(
# color = clr
# )
name='Training loss'
)
loss1 = plotly.graph_objs.Scatter(
# x = ,
y = hist_test,
mode = 'lines',
name = 'Testing loss'
)
loss_trace = [loss0,loss1]
fig_loss = plotly.graph_objs.Figure(data=loss_trace, layout=layout_loss)
plotly.offline.iplot(fig_loss)
plt.figure()
plt.plot(hist, label="Training loss",linewidth=0.3)
plt.plot(hist_test,label="Testing loss",linewidth = 0.3)
plt.legend()
# plt.savefig('./hsmd/loss1.png')
plt.show()
#epoch 100之后的loss
plt.figure()
plt.plot(hist[100:], label="Training loss",linewidth=0.3)
plt.plot(hist_test[100:],label="Testing loss",linewidth = 0.3)
plt.legend()
# plt.savefig('./hsmd/loss1_100.png')
plt.show()
hist[-1]
hist.min()
hist_test.min()
hist_test[-1]
prediction = Lstm(X_test_t)
loss = loss_func(prediction,y_test_t)
prediction
y_test_t
plt.figure()
plt.plot(prediction.detach().numpy(), label="Preds",linewidth = 2.0)
plt.plot(y_test, label="Data")
plt.legend()
plt.show()
y.shape
DATA = y[:,0].reshape((-1,1))
print(DATA.shape)
PRED = np.vstack([y_pred.detach().numpy(),prediction.detach().numpy()])
plt.figure()
plt.plot(PRED, label="Preds",linewidth = 2.0)
plt.plot(DATA, label="Data")
plt.legend()
plt.show()
stock = pd.read_csv('./hsmd/a.us.txt')
print(stock.head())
stock[['Open','High','Low','Close']]= np.log(stock[['Open','High','Low','Close']])
stock.head()
stock['Return'] = stock.Close.diff(1)
stock = stock[['Open','High','Low','Close','Volume','Return']]
stock.dropna(inplace=True)#注意dropna和label的顺序 不然维数不一样
Label = stock.Close
# print(re)
print(stock.head())
# print(Label.shape)
print(stock.shape)
# scaler = preprocessing.MinMaxScaler()
# V_values=stock.Volume.values.reshape((-1,1))
# V_values
# scaler.fit(V_values)
# Volume_=scaler.transform(V_values)
# Volume_
# stock['Volume']=Volume_
# print(stock.head())
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
stock_sv = series_to_supervised(stock,15,1)
# print(stock_sv.head())
# print(stock_sv.columns)
# Close = stock_sv['var4(t)'].values
# print(Close)
# Stock = stock_sv.drop(stock_sv.columns[[-1,-2,-4,-5,-6,-7,-8,-10,-11,-12]],axis=1)
Stock = stock_sv.drop(stock_sv.columns[[-1,-2,-4,-5]],axis=1)
print(Stock.head())
# 对于所有特征进行normalization,y不进行
X = Stock.values[:,:-1]
y = Stock.values[:,-1]
# 对于所有特征进行normalization,y不进行
scaler = preprocessing.MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
print(X)
y = np.exp(y).reshape(-1,1)
print(X.shape,y.shape)
print(y)
scaler_y = preprocessing.MinMaxScaler()
scaler_y.fit(y)
y = scaler_y.transform(y)
print(y)
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.7,shuffle = False)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0],1,X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0],1,X_test.shape[1]))
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
# y_train,y_test = y_train.reshape((-1,2)), y_test.reshape((-1,2))
y_train,y_test = y_train.reshape((-1,1)), y_test.reshape((-1,1))
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
X_train_t = torch.from_numpy(X_train).type(torch.Tensor)
y_train_t= torch.from_numpy(y_train).type(torch.Tensor)
X_test_t = torch.from_numpy(X_test).type(torch.Tensor)
y_test_t = torch.from_numpy(y_test).type(torch.Tensor)
print(X_train_t)
print(X_train_t.shape)
EPOCHS = 500
BATCH_SIZE = 20
TIME_STEP = 1 #上面已经手动把15天的feature搞在一起了
INPUT_SIZE = 91 # X_train.shape[2]
HIDDEN_SIZE = 30
LR = 0.01 # learning rate
# OUTPUT_SIZE = 2
OUTPUT_SIZE = 1
class LSTM(nn.Module):
def __init__(self):
super(LSTM,self).__init__()
self.LSTM = nn.LSTM(
input_size = INPUT_SIZE,
hidden_size = HIDDEN_SIZE,
num_layers = 2,
batch_first = True
)
# self.out = nn.Linear(HIDDEN_SIZE,2)
self.out = nn.Linear(HIDDEN_SIZE,OUTPUT_SIZE)
def forward(self,x):
# x.shape (batch,time_step,input_size)
#r_out.shape (batch,time_step,output_size)
#h_n.shape (n_layers,batch,hidden_size)
#h_c.shape (n_layers,batch,hidden_size)
r_out, (h_n,h_c) = self.LSTM(x,None)
#None : zero initial hidden state
res = self.out(r_out[:,-1,:])
return res
Lstm = LSTM()
print(Lstm)
optimizer = torch.optim.Adam(Lstm.parameters(),lr=LR)
loss_func = nn.MSELoss()
# H = np.zeros(EPOCHS)
# P = Lstm(X_train_t)
# print(P)
# y_train_t.max()
# loss_func(P,y_train_t).item()
# temp=torch.from_numpy(np.exp(Lstm(X_train_t).detach().numpy()))
# hist = np.zeros(EPOCHS)
# hist_test = np.zeros(EPOCHS)
# # hist_exp = np.zeros(EPOCHS)
# # hist_exp_test = np.zeros(EPOCHS)
# for t in range(EPOCHS):
# y_pred = Lstm(X_train_t)
# loss = loss_func(y_pred,y_train_t)
# # loss_exp = loss_func(torch.from_numpy(np.exp(y_pred.detach().numpy())),np.exp(y_train_t))
# if t % 20 == 0:
# print('Epoch:',t,'loss',loss.item())
# print('pred',y_pred.detach().numpy()[-3:])
# print('train',y_train[-3:])
# hist[t] = loss.item()
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
# test_pre = Lstm(X_test_t)
# loss_test = loss_func(test_pre,y_test_t)
# # loss_exp_test = loss_func(torch.from_numpy(np.exp(test_pre.detach().numpy())),np.exp(y_test_t))
# hist_test[t] = loss_test.item()
hist = np.zeros(EPOCHS)
# hist_inv = np.zeros(EPOCHS) #对y进行归一化后,记录去归一化之后的结果,没必要所以全部注释掉
hist_test = np.zeros(EPOCHS)
# hist_test_inv = np.zeros(EPOCHS)
for t in range(EPOCHS):
y_pred = Lstm(X_train_t)
y_pred_inv = torch.tensor(scaler_y.inverse_transform(y_pred.detach().numpy().reshape(-1,1)),requires_grad=True).double()
y_train_inv = torch.tensor(scaler_y.inverse_transform(y_train.reshape(-1,1)),requires_grad=True).double()
loss_inv = loss_func(y_pred_inv,y_train_inv)
loss = loss_func(y_pred,y_train_t)
if t % 20 == 0:
# print('Epoch:',t,'loss',loss.item())
# print('pred',y_pred.detach().numpy()[-3:])
# print('train',y_train[-3:])
print('Epoch:',t,'loss',loss_inv.item())
print('pred',y_pred_inv[:5])
print('train',y_train_inv[:5])
# hist[t] = loss.item()
hist[t] = loss_inv.item()
optimizer.zero_grad()
loss.backward()
# loss_inv.backward()
optimizer.step()
test_pre = Lstm(X_test_t)
y_test_pred_inv = torch.tensor(scaler_y.inverse_transform(test_pre.detach().numpy().reshape(-1,1)),requires_grad=True).double()
y_test_inv = torch.tensor(scaler_y.inverse_transform(y_test.reshape(-1,1)),requires_grad=True).double()
loss_test = loss_func(test_pre,y_test_t)
loss_test_inv = loss_func(y_test_pred_inv,y_test_inv)
# hist_test[t] = loss_test.item()
hist_test[t]=loss_test_inv.item()
plt.figure()
# plt.xlim(xmin=300,xmax=500)
# plt.ylim(ymax=400)
plt.plot(hist, label="Training loss",linewidth=0.3)
plt.plot(hist_test,label="Testing loss",linewidth = 0.3)
# plt.plot(hist_inv, label="Training loss",linewidth=0.3)
# plt.plot(hist_test_inv,label="Testing loss",linewidth = 0.3)
plt.legend()
# plt.savefig('./hsmd/loss1.png')
plt.show()
y_pred_np = y_pred.detach().numpy()
y_pred_np
y_pred_np = scaler_y.inverse_transform(y_pred.detach().numpy().reshape(-1,1))
y_train = scaler_y.inverse_transform(y_train.reshape(-1,1))
train_df=pd.DataFrame({'predict':y_pred_np.reshape(y_pred_np.shape[0]),'real':y_train.reshape(y_train.shape[0])})
train_df['predict'].plot(label='predict',linewidth = 0.3)
train_df['real'].plot(label='real',linewidth = 0.3)
plt.legend()
plt.title('Closing Price Train ')
# plt.savefig('./hsmd/closing_train.png')
prediction = Lstm(X_test_t).detach().numpy()
test_pred = scaler_y.inverse_transform(Lstm(X_test_t).detach().numpy().reshape(-1,1))
y_test = scaler_y.inverse_transform(y_test.reshape(-1,1))
test_df=pd.DataFrame({'predict':test_pred.reshape(test_pred.shape[0]),
'real':y_test.reshape(y_test.shape[0])})
test_df['predict'].plot(label='predict',linewidth = 0.3)
test_df['real'].plot(label='real',linewidth = 0.3)
plt.legend()
plt.title('Closing Price Test')
data_df = pd.concat([train_df,test_df],axis = 0,ignore_index=True)
# print(data_df.head(),data_df.tail())
#
data_df['predict'].plot(label='predict',linewidth = 0.3)
data_df['real'].plot(label='real',linewidth = 0.3)
plt.legend()
plt.title('Closing Price DATA ')
plt.vlines(train_df.shape[0],0,120,colors='r',linestyles='dashed',label='train test',linewidth=0.2)
# plt.savefig('./hsmd/closing_data.png')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import os
path = 'D:\jupyter pytorch\hsmd\dataset\Stocks'
os.chdir(path)
import torch.nn as nn
torch.manual_seed(12)
import random
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
# import plotly.plotly as py
import heapq
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
plt.rcParams['savefig.dpi'] = 400 #图片像素
plt.rcParams['figure.dpi'] = 400 #分辨率
#read data
# kernels let us navigate through the zipfile as if it were a directory
# trying to read a file of size zero will throw an error, so skip them
random.seed(12345)
filenames = [x for x in os.listdir() if x.endswith('.txt') and os.path.getsize(x) > 0]
filenames = random.sample(filenames,52)
print(filenames)
# filenames = ['intu.us.txt', 'spib.us.txt', 'acv.us.txt', 'veon.us.txt', 'vly-ws.us.txt', 'udbi.us.txt', 'flbr.us.txt', 'wsm.us.txt', 'hcm.us.txt', 'cts.us.txt']
data = []
for filename in filenames:
df = pd.read_csv(filename, sep=',')
label, _, _ = filename.split(sep='.')
df['Label'] = label
df['Date'] = pd.to_datetime(df['Date'])
data.append(df)
# random.randint(0,255)
# dict
# r = lambda: random.randint(0,90)#画图的颜色
# str(r()) + str(r()) + str(r())
# random.seed(1)
# print(random.randint(1,10))
# random.randint(1,10)
plotly.__version__
temp = data[1].head(10)
print(temp.head(10))
# temp.sort_values('Date')
# temp['Label'].iloc[1]
# r = lambda: random.randint(0,255)#画图的颜色
traces = []
for df in data:
# clr = str(r()) + str(r()) + str(r())
# df = df.sample(n=100, replace=True)
df = df.sort_values('Date')
# print(df['Label'])
label = df['Label'].iloc[0]#取一个label, 0 1 2其实都一样
trace = plotly.graph_objs.Scatter(
x=df['Date'],
y=df['Close'],
mode='lines',
# line=dict(
# color = clr
# )
name=label
)
traces.append(trace)
layout = plotly.graph_objs.Layout(
title='Plot',
autosize=False,
width=800,
height=600,
xaxis=dict(
title = "Date"
),
yaxis=dict(
title = "Close"
)
)
fig = plotly.graph_objs.Figure(data=traces, layout=layout)
plotly.offline.iplot(fig, filename='dataplot')
# temp = data[0].sort_values('Date')
# label = temp['Label'].iloc[0]
# temp['Close'].std()
stds = [] #用于储存每个company的Close std
labels = []#储存每个company的名字
for df in data:
df = df.sort_values('Date')
label = df['Label'].iloc[0]
std = df['Close'].std()
stds.append(std)
labels.append(label)
#定义函数 返回前n大的位置和数值
def getMaxIndex(num_list,topk=1):
max_num = heapq.nlargest(topk,num_list)
max_num_index=list(map(num_list.index, max_num))
return max_num_index,max_num
#找到std中前10大的公司
max_10_index,max_10_value=getMaxIndex(stds,10)
#将上面选中的公司 组合在一起 形成新的data
new_data = [data[i] for i in max_10_index]
[labels[i] for i in max_10_index]
# new_data[0]
new_traces = [traces[i] for i in max_10_index]
new_fig = plotly.graph_objs.Figure(data=new_traces, layout=layout)
plotly.offline.iplot(new_fig,filename='D:\jupyter pytorch\hsmd\newdata.html')
# plotly.offline.save
#
new_data[0].head()
stock = new_data[4].copy()
# stock = stock.iloc[2000:,:]
stock.head()